# -*- coding: utf-8 -*-
# @Time        :2024/10/31 下午5:34
# @Author      :文刀水寿
# @File        : 04_数据计算_filter.py
"""
 @Description :filter过滤想要的进行保留
 rdd.filter(func)
 返回是True的数据被保留，False的数据被丢弃
"""
from pyspark import SparkConf, SparkContext
import os

os.environ['PYSPARK_PYTHON'] = "D:/Python/python.exe"

conf = SparkConf().setMaster("local[*]").setAppName("test_spark")
sc = SparkContext(conf=conf)

# 准备一个RDD
rdd = sc.parallelize([1, 2, 3, 4, 5])

# 对RDD的数据进行过滤
rdd2 = rdd.filter(lambda num: num % 2 == 0)

print(rdd2.collect())
